rm(list = ls())

# Purpose: Create table that shows Jaccard index, PA labels, and top k keywords for each topic from the best performing model

### PATHS ##############################
in.file <- "./data/best_model_matches_DTM_model_22_topics_top_15_words.csv"
#######################################

data <- read.csv(in.file, stringsAsFactors=FALSE)

data <- data[order(data$jaccard.value, decreasing=TRUE),]

k <- 6
data$top.words <- unlist(lapply(strsplit(data$dtm.topwords, " "), function(x) paste(x[1:k], collapse=" ")))

# nicer labels
data$pa.label.jaccard <- factor(data$pa.label.jaccard)

levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="macroeconomics"] <- "Macroeconomics"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="civil rights and minority issues"] <- "Civil Rights and Minority Issues"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="health"] <- "Health"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="agriculture"] <- "Agriculture"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="labor and employment"] <- "Labor and Employment"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="education and culture"] <- "Education and Culture"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="environment"] <- "Environment"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="energy"] <- "Energy"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="transportation"] <- "Transportation"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="law crime and family issues"] <- "Law, Crime, and Family Issues"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="social welfare"] <- "Social Welfare"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="community development planning and housing"] <- "Community Development, Planning and Housing"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="banking finance and domestic commerce"] <- "Banking, Finance, and Domestic Commerce"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="defense"] <- "Defense"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="space science technology and communications"] <- "Space, Science, Technology and Communications"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="foreign trade"] <- "Foreign Trade"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="international affairs and foreign aid"] <- "International Affairs and Foreign Aid"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="government operations"] <- "Government Operations"
levels(data$pa.label.jaccard)[levels(data$pa.label.jaccard)=="public lands water management colonial"] <- "Public Lands, Water Management, Colonial"


# latex table
paste(data$jaccard.value, "&", data$pa.label.jaccard, "&", data$top.words, "\\[0.1em]")

